For this report, I used the epuRate’s template of Yan Holtz who made it publicly available.
The IHME Development Assistance for Health Database lists grants and loans provided to developing countries by other countries to combat diseases such as HIV, Malaria and other health problems. The file contains data from 1990 to 2019.
In this project, I will use my skills on cartography using cloropleth maps, cartograms and map animation.
The goal of the first question is to show the variation of dollars spent by country in 2019. We see that there are great disparities between countries. The United States, Germany and England are the three countries that have given the most money. They gave at least 6 times more money than Spain for example. We also see that the number of countries giving money is not very large. Obviously, countries with large GDP have more means to give money than small countries. It would be interesting to have the percentage of expenditure that this represents for each country.
# Import Data
ihme <- read_csv("data/IHME_DAH_DATABASE_1990_2019_Y2020M04D23.CSV", na = c(NA,"-",""))
# Prepare data for total expenditure in 2019
tot_spending_2019 <- ihme %>%
filter(year == 2019) %>%
select(source, dah_19) %>%
group_by(source) %>%
summarise(expenditure = sum(dah_19)) %>%
ungroup()
# add the 3-letter code for each country
tot_spending_2019 <- tot_spending_2019 %>%
mutate(source_code = countrycode::countrycode(sourcevar = source,
origin = "country.name",
destination = "iso3c"))
# Import shapefiles (X)
world <- st_read("data/naturalearth/ne_110m_admin_0_countries.shp")
# Joining the files
world_2019 <- world %>%
left_join(tot_spending_2019, c("ISO_A3" = "source_code"))
# Create categories function
quantiles <- function(data) {
data %>%
pull(expenditure) %>%
quantile(probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
}
# Categories
quantile_spending_2019 <- quantiles(world_2019)
# Reformating the labels function
labels <- function(quantiles) {
tibble(
lab1 = quantiles,
lab2 = c(quantiles[2:length(quantiles)], NA)) %>%
slice(1:n() - 1) %>% # We remove the last row, since it has no meaning
mutate_all(round, digits = 0) %>% # We remove digits after the 0
mutate_all(format, big.mark = "'") %>%
mutate_all(paste0, "$") %>% # We add the dollar sign after the digits
mutate(labs = paste(lab1, lab2, sep = "-"))
}
# Labels
labels_spending <- labels(quantile_spending_2019)
# Includes categories in the shapefile with correct labels function
to_category <- function(data, quantiles, labels) {
data %>%
mutate(quantiles = cut(expenditure,
breaks = quantiles,
labels = labels$labs,
include.lowest = TRUE))
}
world_2019 <- to_category(world_2019, quantile_spending_2019, labels_spending)
# Change the projection
world_robin_2019 <- st_transform(world_2019, crs = 'ESRI:54030')
# Plot the map
ggplot(data = world_robin_2019)+
geom_sf(mapping = aes(fill = quantiles)) +
scale_fill_carto_d(type = quantitative, palette = "BurgYl") +
guides(fill = guide_legend(label.position = "bottom")) +
theme_void() +
theme(legend.position = "bottom") +
labs(
title = "Total expenditures per country for health aid",
subtitle = "in 2019",
caption = "Source: Eurostat",
fill = "" )And what about the other side, i.e. the receivers? Who do you think got the most aids? Unfortunately, we don’t have the data for 2019. So we have to go back a little bit. Let’s look at what happened in 2015.
# Prepare data for total expenditure in 2015
tot_earnings_2015 <- ihme %>%
filter(year == 2015) %>%
select(recipient_isocode, dah_19) %>%
group_by(recipient_isocode) %>%
summarise(expenditure = sum(dah_19), na.rm = TRUE) %>%
ungroup()
# Joining the files
world_2015 <- world %>%
left_join(tot_earnings_2015, c("ISO_A3" = "recipient_isocode"))
# Create categories
quantile_earnings <- quantiles(world_2015)
# Reformating the labels
labels_earnings <- labels(quantile_earnings)
# Includes categories in the shapefile with correct labels
world_2015 <- to_category(world_2015, quantile_earnings, labels_earnings)
# Change the projection
world_robin_2015 <- st_transform(world_2015, crs = 'ESRI:54030')
# Plot the map
ggplot(data = world_robin_2015)+
geom_sf(mapping = aes(fill = quantiles)) +
scale_fill_carto_d(type = quantitative, palette = "BurgYl") +
guides(fill = guide_legend(label.position = "bottom")) +
theme_void() +
theme(legend.position = "bottom") +
labs(
title = "Total funds received per country for health aid",
subtitle = "in 2015",
caption = "Source: Eurostat",
fill = "" )Only 3 countries received financial support in 2015. The largest grant was to the Democratic Republic of Congo. I know that 2015 was exactly in the middle of the Ebola virus. However, the data does not seem to show this kind of disease. Although the data would allow us to analyse this in more depth, let’s try to get a more global view.
We would indeed like to have an overview of financial aid by major regions, rather than by country. To do this, we need to redefine the shape of our map. Fortunately, there is a function to merge the different countries.
# Extract the name of the gdb_regions
gbd_region_name <- ihme %>%
distinct(gbd_region) %>%
filter(gbd_region != "Europe, Western",
gbd_region != "Global",
gbd_region != "Unallocated/Unspecified")
# create lists of all country codes within a region
region <- ihme %>%
group_by(gbd_region) %>%
distinct(recipient_isocode) %>%
summarise(recipient_isocode = list(recipient_isocode)) %>%
filter(gbd_region != "Europe, Western",
gbd_region != "Global",
gbd_region != "Unallocated/Unspecified")
# Create an empty list
gbd_region_sf <- list()
# for each region, Populate the empty list with all rows from "world" which corresponds to the country of the region
for (i in 1:nrow(region)) {
gbd_region_sf[[i]] <- world %>%
filter(ISO_A3 %in% as_vector(region[[2]][i]))
}
# Populate the gbd_region_sf with one last region, those who never received aid or are not part of the subregion from above.
other_region <- ihme %>%
group_by(gbd_region) %>%
distinct(recipient_isocode)
gbd_region_sf[[19]] <- world %>%
filter(!(ISO_A3 %in% other_region$recipient_isocode))
# Give the names of the region to the new lists
names(gbd_region_sf) <- gbd_region_name$gbd_region
names(gbd_region_sf)[[19]] <- "other_country"
# Extract the lists in order to have a data frame for each region in the global environment
list2env(gbd_region_sf, .GlobalEnv)
# clipping all the regions to have a single geometry
asia_pacific <- st_union(`Asia Pacific, high-income`)
asia_central <- st_union(`Asia, Central`)
asia_south <- st_union(`Asia, South`)
asia_east <- st_union(`Asia, East`)
asia_southeast <- st_union(`Asia, Southeast`)
latin_america_andean <- st_union(`Latin America, Andean`)
latin_america_central <- st_union(`Latin America, Central`)
latin_america_south <- st_union(`Latin America, Southern`)
latin_america_tropical <- st_union(`Latin America, Tropical`)
north_africa_middle_east <- st_union(`North Africa/Middle East`)
oceania <- st_union(Oceania)
sub_saharan_africa_central <- st_union(`Sub-Saharan Africa, Central`)
sub_saharan_africa_south <- st_union(`Sub-Saharan Africa, Southern`)
sub_saharan_africa_east <- st_union(`Sub-Saharan Africa, Eastern`)
sub_saharan_africa_west <- st_union(`Sub-Saharan Africa, Western`)
europe_central <- st_union(`Europe, Central`)
europe_east <- st_union(`Europe, Eastern`)
caribbean <- st_union(Caribbean)
other_country <- st_union(other_country)
# Create a new empty shapefile
nrows <- 19
geometry = st_sfc(lapply(1:nrows, function(x) st_geometrycollection()))
df <- st_sf(id = 1:nrows, geometry = geometry)
# Populate the new shapefile
df$geometry <- c(asia_pacific, asia_south, asia_central, asia_east, asia_southeast ,latin_america_andean , latin_america_central, latin_america_south , latin_america_tropical, north_africa_middle_east, oceania , sub_saharan_africa_central, sub_saharan_africa_south, sub_saharan_africa_east, sub_saharan_africa_west, europe_central, europe_east, caribbean, other_country)
# Select the total of money spent
tot_earnings <- ihme %>%
group_by(gbd_region) %>%
summarise(expenditure = sum(dah_19, na.rm = TRUE)) %>%
filter(gbd_region != "Europe, Western",
gbd_region != "Global",
gbd_region != "Unallocated/Unspecified") %>%
rbind(c("other_country", NA)) %>%
mutate(expenditure = as.numeric(expenditure))
df$expenditure <- tot_earnings$expenditure
# Create quantiles
quantile_tot <- quantiles(df)
# Reformating the labels
labels_earnings_tot <- labels(quantile_tot)
# Includes categories in the shapefile with correct labels
df <- to_category(df, quantile_tot, labels_earnings_tot)
# Change the projection
world_robin_tot <- st_transform(df, crs = 'ESRI:54030')
# Plot the map
ggplot(data = world_robin_tot)+
geom_sf(mapping = aes(fill = quantiles)) +
scale_fill_carto_d(type = quantitative, palette = "BurgYl") +
guides(fill = guide_legend(label.position = "bottom")) +
theme_void() +
theme(legend.position = "bottom") +
labs(
title = "Total funds received per country for health aid",
subtitle = "from 1990 to 2019",
caption = "Source: Eurostat",
fill = ""
)In the period from 1990 to 2019, North Africa, South Africa and South Asia are the regions that have received the most financial support. In contrast, North America, Europe and Australia have not received any aid in the last 30 years.
What if we want to see the changes over the years? Let’s try to create an animated map.
# Prepare data for total expenditure
tot_earnings <- ihme %>%
select(recipient_isocode, dah_19, year) %>%
group_by(recipient_isocode, year) %>%
summarise(expenditure = sum(dah_19),) %>%
ungroup()
# Creating the world dataframes for each year
years <- c(1990:2019)
for (i in years) {
assign(paste0("world", i), world %>% mutate( year = i))
}
#Binding dataframes together
world_year <- bind_rows(world1990,
world1991,
world1992,
world1993,
world1994,
world1995,
world1996,
world1997,
world1998,
world1999,
world2000,
world2001,
world2002,
world2003,
world2004,
world2005,
world2006,
world2007,
world2008,
world2009,
world2010,
world2011,
world2012,
world2013,
world2014,
world2015,
world2016,
world2017,
world2018,
world2019)
# Joining the files
world_year <- world_year %>%
left_join(tot_earnings, c("ISO_A3" = "recipient_isocode", "year" = "year"))
# Create categories
quantile_earnings <- quantiles(world_year)
# Reformating the labels
labels_earnings <- labels(quantile_earnings)
# Includes categories in the shapefile with correct labels
world_year <- to_category(world_year, quantile_earnings, labels_earnings)
# Change the projection
world_eck_year <- st_transform(world_year, crs = 'ESRI:54013')
world_eck_year <- world_eck_year %>%
st_simplify()
# Plot the map
ggplot()+
geom_sf(data = world_eck_year,
mapping = aes(fill = quantiles)) +
scale_fill_carto_d(type = quantitative, palette = "BurgYl") +
guides(fill = guide_legend(label.position = "bottom")) +
theme_void() +
theme(legend.position = "bottom") +
labs(
title = "Total funds received per country for health aid",
subtitle = "{closest_state}",
caption = "Source: Eurostat",
fill = "" ) +
transition_states(year, state_length = 10, transition_length = 10)